requires : spdiv, spinfo (spinfo is the file containing information about all species, spdiv is the collected data.)
require(naniar)
require(ggplot2)
require(grid)
require(VennDiagram)
require(visdat)
Note : this script is based on the Exploratoreis diversity dataset, which is read in with the script analysis_nonpublic.R. It reads in the two datasets spdiv and spinfo.
From cleaning, getting numbers : - 334700 species in the dataset - 28 different trophic levels - type = presenceabsence : plant pathogen, ant omnivore
# delete Plot_bexis (old plot names)
spdiv[, Plot_bexis := NULL]
# spdiv[, c("Dataversion", "DataID") := NULL]
spdiv <- merge(spdiv, spinfo, by = "Species", all.x=T)
# now I can remove the original datasets, so I save cpu:
rm(spinfo); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3360100 179.5 5033091 268.8 5033091 268.8
## Vcells 75180919 573.6 218782701 1669.2 355987468 2716.0
Set to presence-absence
spdiv[value != 0, value := 1]
Group to 18 trophic levels as needed for analysis. (see also info_diversity.ods)
# take out trophic levels which are not used in analysis
spdiv <- spdiv[!Trophic_level %in% c("detritivore", "protist.unknown", "soilfungi.other", "vert.herb", "protist.parasite.nonplant")]
# merge the trophic levels which are merged in analysis
spdiv[Trophic_level == "herbivore.snail", Trophic_level := "herbivore"]
spdiv[Trophic_level == "ant.omnivore", Trophic_level := "omnivore"]
spdiv[Trophic_level == "omnivore.snail", Trophic_level := "omnivore"]
spdiv[Trophic_level == "protist.mainly.bacterivore", Trophic_level := "protist.bacterivore"]
# unique(spdiv$Trophic_level)
show years
unique(spdiv[, .(Trophic_level, Year)])
length(unique(spdiv$Species)) # 309734 species in the dataset (incl. OTU)
Get overview of amount of species per trophic level
for(t in unique(spdiv$Trophic_level)){
print(c(t, length(unique(spdiv[Trophic_level == t, Species]))))
}
Store trophic levels in vector
trlevels <- sort(unique(spdiv$Trophic_level))
Years 2008 to 2016. Lichens and Bryophytes only 2009 (119 species, 137 plots)
| year | no. species | no. Plots |
|---|---|---|
| 2008 | 344 | 147 |
| 2009 | 463 | 150 |
| 2009 l | 119 | 150 |
| 2009 p | 344 | 150 |
| 2010 | 344 | 150 |
| 2011 | 344 | 150 |
| 2012 | 344 | 149 |
| 2013 | 344 | 149 |
| 2014 | 344 | 148 |
| 2015 | 344 | 150 |
| 2016 | 344 | 150 |
autotroph <- spdiv[Trophic_level == "autotroph"]
# how many species and how many plots were measured in the different years?
length(unique(autotroph[Year == "2008", Plot])) # Changed year and tested for all years for reporting in the table
## [1] 147
# get vector with lichen species to examine lichens separately
lichenspecies <- setdiff(unique(autotroph[Year == "2009", Species]), unique(autotroph[Year == "2008", Species]))
# examine lichens and plants separately - same number of plots and expected number of species?
length(unique(autotroph[!Species %in% lichenspecies, Species]))
## [1] 344
length(unique(autotroph[Species %in% lichenspecies, Plot]))
## [1] 150
Plots identities
# are the same plots missing in all years?
testplotpresence <- unique(autotroph[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# no, different plots are missing.
rm(testplotpresence)
If a plant species is present in at least 1 year, it is considered to be present. Underlying assumption : plant species composition is quite stable, therefore it is representative to assess their presence in a large time span.
Test : check how different the species composition is if considering all years.
Assessing presence of plant species in all years. Lichen and Bryophyte species are taken directly from year 2009, as this is the only year.
plants <- autotroph[!Species %in% lichenspecies, ]
lichens <- autotroph[Species %in% lichenspecies,]
# sum(length(unique(plants$Species)), length(unique(lichens$Species))) # check if the sum of species is 463 to be sure to have all autotroph species
# calculate the mean of all value (species presence/ absence). If it is not zero, the species was present in at least one year and the value can be set to 1.
plants <- aggregate(value ~ Species + Plot, plants, function(x) mean(x, na.rm=T))
plants <- data.table::data.table(plants)
# nrow(plants) == 344*150 # TRUE : there are 344 species per plot in the new dataset
# Check how many plant species have been present / absent in how many years
hist(plants$value, main="Presence of plant species among years", sub="The plot indicates if a plant species was either present in no year, \n in 1 years , ... , in all 9 years. Most species are never present", xlab = "")
# set the numbers which are not 0 or 1 to 1 (if value > 0, that means that plant has been present at least in one year)
plants[value != 0, value := 1]
# add the lichens dataset to the plants dataset
autotroph <- rbind(plants, lichens[, .(Species, Plot, value)])
nrow(autotroph) == 150 * 344 + 150 * 119 # TRUE : there are 119 lichen and 344 plant species in all 150 plots.
## [1] TRUE
hist(autotroph$value)
hist(lichens$value)
hist(plants$value)
Although working with a range of 9 years, still many plant species are not present in many plots. This indicates that the assumption of stable autotroph species was correct.
# remove lichens and plants, not used any more
rm(plants) ; rm(lichens); rm(lichenspecies)
# take out autotrophs from spdiv
spdiv <- spdiv[!Trophic_level == "autotroph"]
herbivore <- spdiv[Trophic_level == "herbivore"]
Note : the code below can be deleted as soon as the new diversity dataset is available.
# Arthropod herbivores consist of 3 Groups, 2 of them contain missing data, one not.
# We can only take the plots present in all groups --> reduce the number of plots
unique(herbivore[Year == "2008", Group_fine])
## [1] "Hemiptera" "Coleoptera" "Hymenoptera" "Orthoptera"
length(unique(herbivore[Year == "2008" & Group_fine == "Hymenoptera", Plot]))
## [1] 150
| Dataset | Hemiptera | Coleoptera | Hymenoptera | Orthoptera |
|---|---|---|---|---|
| no Plots | 139 | 139 | 150 | 139 |
herbivore_plotset <- unique(herbivore[Year == "2008" & Group_fine == "Orthoptera", Plot])
# check if the plot set is consistent
sum(herbivore_plotset != unique(herbivore[Year == "2008" & Group_fine == "Hemiptera", Plot])) # all the same
## [1] 0
sum(herbivore_plotset != unique(herbivore[Year == "2008" & Group_fine == "Coleoptera", Plot])) # all the same
## [1] 0
# select the plot set for herbivores
# select all rows which are EITHER from 2008 (arthropods) and from the selected plot list, OR they are from 2017
# species from 2017 don't have to be part of the selected plots
herbivore <- herbivore[Plot %in% herbivore_plotset & Year == "2008" | Year == "2017", ]
rm(herbivore_plotset)
testplotpresence <- unique(herbivore[, .(Year, Plot)])
visdat::vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Snails were only measured in 134 plots. They contain 39 species. Insects were measured in 139 plots, they contain 401 species.
length(unique(herbivore[Year == "2008", Species]))
length(intersect(herbivore[Year == "2008", Plot], herbivore[Year == "2017", Plot]))
Too many missing plots in snails dataset. Take insects dataset only.
herbivore <- herbivore[Year == "2008", .(Species, Plot, value)]
nrow(herbivore) == 139 * 401 # 401 species in 139 plots present
## [1] TRUE
# remove from spdiv
spdiv <- spdiv[!Trophic_level == "herbivore"]
Consists of arthropods dataset from 2008, ants dataset from 2014_2015, and snails dataset from 2017.
omnivore <- spdiv[Trophic_level == "omnivore"]
testplotpresence <- unique(omnivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Many plots are missing.
| subset | arthropods | ants | snails | all | arth&sn |
|---|---|---|---|---|---|
| Year | 2008 | 14 15 | 2017 | all | 08 & 17 |
| no Plots | 139 | 110 | 134 | 97 | 124 |
| no Species | 14 | 20 | 21 | 55 | 35 |
# length(intersect(omnivore[Year == "2008", Plot], omnivore[Year == "2017", Plot]))
# length(intersect(intersect(omnivore[Year == "2008", Plot], omnivore[Year == "2017", Plot]), omnivore[Year == "2014_2015", Plot]))
Only took the arthropod dataset, as snails and ants have too many missing plots.
omnivore <- omnivore[Year == "2008", .(Species, Plot, value)]
# omnivore_ant <- omnivore[Year == "2014_2015", .(Species, Plot, value)]
# omnivore_snail <- omnivore[Year == "2017", .(Species, Plot, value)]
nrow(omnivore) == 139 * 14 # expected number of rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "omnivore"]
#TODO : many plots do not contain any species - take out?
test <- aggregate(value ~ Plot, omnivore, sum)
barplot(height = test$value)
abline(h = mean(test$value), col = "red")
abline(h = median(test$value), col = "green")
print(test$Plot[which(test$value == 0)])
## [1] "AEG03" "AEG04" "AEG07" "AEG08" "AEG09" "AEG11" "AEG12" "AEG13"
## [9] "AEG18" "AEG20" "AEG21" "AEG22" "AEG25" "AEG26" "AEG27" "AEG28"
## [17] "AEG30" "AEG31" "AEG32" "AEG33" "AEG34" "AEG35" "AEG36" "AEG38"
## [25] "AEG39" "AEG40" "AEG44" "AEG47" "AEG48" "AEG49" "HEG01" "HEG02"
## [33] "HEG03" "HEG04" "HEG05" "HEG06" "HEG07" "HEG08" "HEG10" "HEG11"
## [41] "HEG12" "HEG14" "HEG15" "HEG16" "HEG17" "HEG19" "HEG21" "HEG23"
## [49] "HEG24" "HEG26" "HEG27" "HEG33" "HEG36" "HEG38" "HEG39" "HEG40"
## [57] "HEG43" "HEG44" "HEG45" "HEG48" "HEG49" "HEG50" "SEG01" "SEG02"
## [65] "SEG03" "SEG04" "SEG05" "SEG06" "SEG07" "SEG08" "SEG09" "SEG10"
## [73] "SEG11" "SEG12" "SEG13" "SEG14" "SEG15" "SEG17" "SEG18" "SEG19"
## [81] "SEG20" "SEG21" "SEG22" "SEG23" "SEG24" "SEG25" "SEG26" "SEG27"
## [89] "SEG31" "SEG32" "SEG33" "SEG34" "SEG35" "SEG36" "SEG37" "SEG38"
## [97] "SEG39" "SEG41" "SEG42" "SEG43" "SEG44" "SEG47" "SEG48" "SEG49"
## [105] "SEG50"
#TODO : take out for the moment
trlevels <- trlevels[-which(trlevels == "omnivore")]
rm(omnivore)
2008 dataset from
secondary.consumer <- spdiv[Trophic_level == "secondary.consumer"]
testplotpresence <- unique(secondary.consumer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Only 2008 dataset has some missing values
| Year | 2008 | 2011 |
|---|---|---|
| no Plots | 139 | 150* |
| no Species | 170 | 4 |
Species set does not overlap.
# length(unique(secondary.consumer[Year == "2008", Species]))
any(unique(secondary.consumer[Year == "2008", Species]) %in% unique(secondary.consumer[Year == "2011", Species]))
Select the Plot set from 2008 and merge the datasets together.
secondary.consumer <- secondary.consumer[Plot %in% unique(unique(secondary.consumer[Year == "2008", Plot])), .(Species, Plot, value)]
nrow(secondary.consumer) == 4 * 139 + 170 * 139 # expected number of rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "secondary.consumer"]
tertiary.consumer <- spdiv[Trophic_level == "tertiary.consumer"]
testplotpresence <- unique(tertiary.consumer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
| Year | 2008 | 2009 | 2010 | 2011 | 2012 |
|---|---|---|---|---|---|
| subset | birds | birds bats | birds bats | birds | birds |
| no Plot | 150 | 150+149 | 150+150 | 150 | 150 |
| no Species | 67 | 78= 67+11 | 78= 67+11 | 67 | 67 |
# unique(tertiary.consumer[Year == "2012", Group_fine])
# length(unique(tertiary.consumer[Year == "2012", Plot]))
# length(intersect(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2010", Species])))
batspecies <- setdiff(unique(tertiary.consumer[Year == "2009", Species]), unique(tertiary.consumer[Year == "2011", Species]))
The Bird and Bat species of the different years are the same.
Take average of Species occurrence and check how often species occur. We assume that birds and bats are difficult to measure, therefore having a large time window makes sense. A species is considered to be present in a plot, if it is present at least in one year.
birds <- aggregate(value ~ Species + Plot, tertiary.consumer[Group_fine == "Birds"], function(x) mean(x, na.rm=T))
birds <- data.table::data.table(birds)
# nrow(birds) == 67 * 150 # TRUE : 67 species in 150 plots
hist(birds$value, main="Presence of bird species among years")
# If species is present at least in one year, take it as present
birds[value > 0, value := 1]
nrow(birds) == 67 * 150 # expected number of rows present (67 species in 150 plots)
## [1] TRUE
bats <- aggregate(value ~ Species + Plot, tertiary.consumer[Group_fine == "Bats" & Species %in% batspecies], function(x) mean(x, na.rm=T))
bats <- data.table::data.table(bats)
# nrow(bats) == 11 * 150
hist(bats$value)
# If species is present at least in one year, take it as present
bats[value > 0, value := 1]
nrow(bats) == 11 * 150 # expected number of rows present (11 species in 150 plots)
## [1] TRUE
# combine the bird and bat dataset
tertiary.consumer <- rbind(birds, bats)
nrow(tertiary.consumer) == 150 * (11 + 67)
## [1] TRUE
rm(bats); rm(birds); rm(batspecies)
spdiv <- spdiv[!Trophic_level == "tertiary.consumer"]
bacteria.RNA <- spdiv[Trophic_level == "bacteria.RNA"]
testplotpresence <- unique(bacteria.RNA[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~ Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Not visible in the plot, but there are only 148 plots for both years.
Check if OTU names are the same in both years. No obvious overlap.
length(setdiff(unique(bacteria.RNA[Year == "2014", Species]), unique(bacteria.RNA[Year == "2011", Species])))
# length(unique(bacteria.RNA$Species))
# The OTU names prefix is either bac11 or bac14 ...
# grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species)
bacteria.RNA[grep("01ece679a6294aadf97b931ce39a539a", bacteria.RNA$Species), Year]
The OTUs can be compared across the years. However, we will only work with the 2011 dataset in order to be more consistent with the years. Replace the prefix by bac_ instead of bac11_ and bac14_
# In case the years would be combined
bacteria.RNA[grep("bac11_", Species), Species := gsub("bac11_", "bac14_", Species)]
| year | 2011 | 2014 |
|---|---|---|
| no Plots | 148 | 148 |
| no OTUs | 140510 | 139691 |
length(unique(bacteria.RNA[Year == "2014", Species])) # 139691
length(setdiff(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 34435 # 35254
length(intersect(unique(bacteria.RNA[Year == "2011", Species]), unique(bacteria.RNA[Year == "2014", Species]))) # 105256
2014 : 34435 OTUs unique to 2014, 105256 overlapping of total 139691. ~ 1/4 to 3/4 2011 : 35254 OTUs unique to 2011, 105256 overlapping of total 140510. ~ 1/4 to 3/4
grid::grid.newpage()
VennDiagram::draw.pairwise.venn(area1 = 34435 + 105256, area2 = 35254 + 105256, cross.area = 105256, category = c("2011 OTU", "2014 OTU"))
## (polygon[GRID.polygon.2421], polygon[GRID.polygon.2422], polygon[GRID.polygon.2423], polygon[GRID.polygon.2424], text[GRID.text.2425], text[GRID.text.2426], text[GRID.text.2427], text[GRID.text.2428], text[GRID.text.2429])
Select 2011 dataset.
bacteria.RNA <- bacteria.RNA[Year == "2011", .(Species, Plot, value)]
# The bacteria RNA dataset had been reduced: all species-plot combinations with 0 were removed.
# reconstruct them (code from Caterina)
bacteria.RNA <- data.table::setDT(bacteria.RNA)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
bacteria.RNA[is.na(value), value := 0 ]
# # add NA values for the two missing plots
# bac_missing <- rbind(data.table::data.table(Species = unique(bacteria.RNA[Plot == "AEG21", Species]), Plot = "AEG33", value = NA),
# data.table::data.table(Species = unique(bacteria.RNA[Plot == "AEG21", Species]), Plot = "AEG34", value = NA))
nrow(bacteria.RNA) == 148 * 140510 # expected number of plots present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "bacteria.RNA"]
2011, 2011_2012, 2017 datasets
protist.bacterivore <- spdiv[Trophic_level == "protist.bacterivore"]
testplotpresence <- unique(protist.bacterivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Only 1 plot missing, in 2011
Are the OTU the same across years? Datasets 2011 and 2017 are comparable with each others, but not with dataset 2011_2012. The name appendices for the latter need to be preserved, the other prefixes can be removed.
protist.bacterivore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # remove appendix from 2011
protist.bacterivore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # remove appendix from 2017
length(setdiff(unique(protist.bacterivore[Year == "2011", Species]), unique(protist.bacterivore[Year == "2011_2012", Species])))
length(intersect(unique(protist.bacterivore[Year == "2017", Species]), unique(protist.bacterivore[Year == "2011_2012", Species])))
# length(unique(protist.bacterivore[Year == "2011", Species]))
# length(unique(protist.bacterivore[Year == "2011_2012", Species]))
| year | 2011 | 2011_2012 | 2017 | 2011 2017 | 2017 2011_2012 |
|---|---|---|---|---|---|
| no Plots | 149 | 150 | 150 | 149 | 149 |
| no OTUs | 856 | 233 | 863 | 833+23+30 | 0 |
No overlap between 2011 and 2011_2012 and 2017 and 2011_2012.
For BetaDivMultifun, only year 2011 is chosen :
protist.bacterivore <- protist.bacterivore[Year == "2011", .(Species, Plot, value)]
nrow(protist.bacterivore) == 149 * 856 # TRUE : The expected number of rows is present.
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.bacterivore"]
protist.eukaryvore <- spdiv[Trophic_level == "protist.eukaryvore"]
testplotpresence <- unique(protist.eukaryvore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Only 1 plot missing, in 2011.
# code for OTU renaming, in case they would like to be combined.
protist.eukaryvore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.eukaryvore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(setdiff(unique(protist.eukaryvore[Year == "2017", Species]), unique(protist.eukaryvore[Year == "2011", Species])))
length(unique(protist.eukaryvore[Year == "2011", Plot]))
| Year | 2011 | 2017 | both |
|---|---|---|---|
| no Plots | 149 | 150 | 149 |
| no OTUs | 211 | 210 | 204+7+6 |
For BetaDivMultifun, only year 2011 is chosen :
protist.eukaryvore <- protist.eukaryvore[Year == "2011", .(Species, Plot, value)]
nrow(protist.eukaryvore) == 149 * 211 # TRUE : The expected number of rows is present.
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.eukaryvore"]
protist.omnivore <- spdiv[Trophic_level == "protist.omnivore"]
testplotpresence <- unique(protist.omnivore[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Only 1 plot missing, in 2011.
# code for enaming of OTUs in case years would be combined.
protist.omnivore[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.omnivore[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(intersect(unique(protist.omnivore[Year == "2011", Species]), unique(protist.omnivore[Year == "2017", Species])))
length(unique(protist.omnivore[Year == "2017", Species]))
| Year | 2011 | 2017 | both |
|---|---|---|---|
| no Plots | 149 | 150 | 149 |
| no OTUs | 440 | 434 | 430+10+4 |
Only Year 2011 is chosen.
protist.omnivore <- protist.omnivore[Year == "2011", .(Species, Plot, value)]
nrow(protist.omnivore) == 149 * 440 # expected number of rows is present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.omnivore"]
protist.plant.parasite <- spdiv[Trophic_level == "protist.plant.parasite"]
testplotpresence <- unique(protist.plant.parasite[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Only 1 plot missing, in 2011.
# Code for OTU renaming, in case years would be combined.
protist.plant.parasite[grep("_protist24426", Species), Species := gsub("_protist24426", "", Species)] # clean 2011
protist.plant.parasite[grep("_protist24466", Species), Species := gsub("_protist24466", "", Species)] # clean 2017
# interchange intersect with setdiff and modify years to obtain values in table below
length(setdiff(unique(protist.plant.parasite[Year == "2017", Species]), unique(protist.plant.parasite[Year == "2011", Species])))
length(unique(protist.plant.parasite[Year == "2017", Plot]))
| Year | 2011 | 2017 | both |
|---|---|---|---|
| no Plots | 149 | 150 | 149 |
| no OTUs | 96 | 95 | 95+1+0 |
Only 2011 is chosen
protist.plant.parasite <- protist.plant.parasite[Year == "2011", .(Species, Plot, value)]
nrow(protist.plant.parasite) == 149 * 96
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "protist.plant.parasite"]
Years 2011, 2014, 2017
soilfungi.decomposer <- spdiv[Trophic_level == "soilfungi.decomposer"]
testplotpresence <- unique(soilfungi.decomposer[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
Check if OTU need renaming and check overlap of OTUs
length(unique(soilfungi.decomposer[Year == "2017", Species]))
length(setdiff(unique(soilfungi.decomposer[Year == "2017", Species]), unique(soilfungi.decomposer[Year == "2014", Species])))
length(intersect(intersect(unique(soilfungi.decomposer[Year == "2011", Species]), unique(soilfungi.decomposer[Year == "2017", Species])), unique(soilfungi.decomposer[Year == "2014", Species])))
| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all |
|---|---|---|---|---|---|---|---|
| no Plots | 150 | 150 | 150 | 150 | 150 | 150 | 150 |
| no OTUs | 12342 | 10772 | 10971 | 9403+2939+1369 | 9368+2974+1603 | 8391+2381+2580 | 7420+(see left) |
Only year 2011 is chosen
soilfungi.decomposer <- soilfungi.decomposer[Year == "2011", .(Species, Plot, value)]
# adding the missing plot - species combinations . all species-plot combinations with 0 had been removed to store memory
# reconstruct them (code from Caterina)
soilfungi.decomposer <- data.table::setDT(soilfungi.decomposer)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.decomposer[is.na(value), value := 0 ]
nrow(soilfungi.decomposer) == 150 * 12342 # TRUE : now, all expected rows are present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.decomposer"]
soilfungi.pathotroph <- spdiv[Trophic_level == "soilfungi.pathotroph"]
testplotpresence <- unique(soilfungi.pathotroph[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
No missing plots
# code to fill table below
length(unique(soilfungi.pathotroph[Year == "2017", Species]))
length(setdiff(unique(soilfungi.pathotroph[Year == "2017", Species]), unique(soilfungi.pathotroph[Year == "2011", Species])))
length(intersect(intersect(unique(soilfungi.pathotroph[Year == "2011", Species]), unique(soilfungi.pathotroph[Year == "2017", Species])), unique(soilfungi.pathotroph[Year == "2014", Species])))
| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all |
|---|---|---|---|---|---|---|---|
| no Plots | 150 | 150 | 150 | 150 | 150 | 150 | 150 |
| no OTUs | 4097 | 6313 | 2924 | 2852+1245+3461 | 2852+1245+304 | 2134+4179+790 | 1934+(see left) |
Only 2011 data is used here.
soilfungi.pathotroph <- soilfungi.pathotroph[Year == "2011", .(Species, Plot, value)]
# all plot-species combinations with 0 had been removed to save memory. They are added back now. (code from Caterina)
soilfungi.pathotroph <- data.table::setDT(soilfungi.pathotroph)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.pathotroph[is.na(value), value := 0 ]
nrow(soilfungi.pathotroph) == 150 * 4097 # TRUE : all expected rows present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.pathotroph"]
soilfungi.symbiont <- spdiv[Trophic_level == "soilfungi.symbiont"]
testplotpresence <- unique(soilfungi.symbiont[, .(Year, Plot)])
vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
## Using 'Plot' as value column. Use 'value.var' to override
rm(testplotpresence)
No missing plots
length(unique(soilfungi.symbiont[Year == "2017", Species]))
length(setdiff(unique(soilfungi.symbiont[Year == "2017", Species]), unique(soilfungi.symbiont[Year == "2014", Species])))
length(intersect(intersect(unique(soilfungi.symbiont[Year == "2011", Species]), unique(soilfungi.symbiont[Year == "2017", Species])), unique(soilfungi.symbiont[Year == "2014", Species])))
| Year | 2011 | 2014 | 2017 | 11 14 | 11 17 | 14 17 | all |
|---|---|---|---|---|---|---|---|
| no Plots | 150 | 150 | 150 | 150 | 150 | 150 | 150 |
| no OTUs | 1416 | 1478 | 1445 | 1093+323+385 | 1057+359+388 | 1080+398+365 | 914+(see left) |
Only 2011 is chosen.
soilfungi.symbiont <- soilfungi.symbiont[Year == "2011", .(Species, Plot, value)]
# adding back missing plot-species combinations (code from Caterina)
soilfungi.symbiont <- data.table::setDT(soilfungi.symbiont)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
soilfungi.symbiont[is.na(value), value := 0 ]
nrow(soilfungi.symbiont) == 150 * 1416 # all expected combinations present
## [1] TRUE
spdiv <- spdiv[!Trophic_level == "soilfungi.symbiont"]
belowground.herbivore <- spdiv[Trophic_level == "belowground.herbivore", .(Species, Plot, value)]
spdiv <- spdiv[!Trophic_level == "belowground.herbivore"]
# testplotpresence <- unique(belowground.herbivore[, .(Year, Plot)])
# vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# rm(testplotpresence)
nrow(belowground.herbivore) == 150 * 11 # expected number of rows present
## [1] TRUE
All plots are present, only one year (simple situation - no plot is shown).
hist(belowground.herbivore$value, main="Presence of belowground herbivores, 11 species", sub="The plot indicates that most species - Plot combinations are not present. This is a first indicator for relatively high (acceptable high) beta-diversity.", xlab = "")
bh_overv <- aggregate(value ~ Plot, belowground.herbivore, sum)
barplot(height = bh_overv$value)
abline(h = mean(bh_overv$value), col = "red")
abline(h = median(bh_overv$value), col = "green")
Most plots contain 3 species. 5 Plots do not contain ANY species! Need to be removed from the dataset.
#TODO : exclude plots with zero species until we find a solution.
# Problem : betadiversity can't be calculated on plots without species.
zeroplots <- bh_overv$Plot[which(bh_overv$value == 0)]
belowground.herbivore <- belowground.herbivore[!Plot %in% zeroplots]
rm(zeroplots) ; rm(bh_overv)
Calculate betadiversity to check wether plots differ in their species composition / turnover.
beta.belowground.herbivore <- prepare_for_betapair(belowground.herbivore)
beta.belowground.herbivore <- betapart::beta.pair(beta.belowground.herbivore, index.family = "sorensen")
corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
corrplot::corrplot(as.matrix(beta.belowground.herbivore$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
There is definitely some betadiversity among the plots.
nrow(belowground.herbivore) == 145 * 11 # expected number of plots present
## [1] TRUE
# cleaning
rm(beta.belowground.herbivore)
belowground.predator <- spdiv[Trophic_level == "belowground.predator"]
nrow(belowground.predator) == 17 * 150 # expected number of plots present
## [1] TRUE
bp_overv <- aggregate(value ~ Plot, belowground.predator, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
There is 1 plot in belowground predators which does not contain any species.
#TODO : find out how to handle plots without species
zeroplots <- bp_overv$Plot[which(bp_overv$value == 0)]
belowground.predator <- belowground.predator[!Plot %in% zeroplots]
rm(zeroplots); rm(bp_overv)
spdiv <- spdiv[!Trophic_level == "belowground.predator"]
calc betadiversity and plot
beta.belowground.predator <- prepare_for_betapair(belowground.predator)
beta.belowground.predator <- betapart::beta.pair(beta.belowground.predator, index.family = "sorensen")
corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sor),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sim),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
corrplot::corrplot(as.matrix(beta.belowground.predator$beta.sne),type="lower", method="color", diag=F, mar=c(1,0,1,0), number.cex=0.4, tl.srt = 45, tl.col = "black", tl.cex = 0.5)
nrow(belowground.predator) == 149 * 17 # expected number of rows present
## [1] TRUE
# cleaning
rm(beta.belowground.predator)
decomposer <- spdiv[Trophic_level == "decomposer", .(Species, Plot, value)]
# testplotpresence <- unique(decomposer[, .(Year, Plot)])
# vis_miss(data.table::dcast(testplotpresence, Plot ~Year))
# rm(testplotpresence)
bp_overv <- aggregate(value ~ Plot, decomposer, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
#TODO HERE! There are so many plots without decomposers - no idea if it still worth it... Include them or not?
length(unique(decomposer$Species)) # even though 40 species, there are so few present.
## [1] 40
# sum(bp_overv$value == 0) # 81 plots have species, 58 don't, total 139.
#TODO : take out for the moment, until decision
trlevels <- trlevels[-which(trlevels == "decomposer")]
spdiv <- spdiv[!Trophic_level == "decomposer"]
rm(decomposer)
plant.pathogen <- spdiv[Trophic_level == "plant.pathogen"]
spdiv <- spdiv[!Trophic_level == "plant.pathogen"]
bp_overv <- aggregate(value ~ Plot, plant.pathogen, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
remove missing plots
plant.pathogen <- plant.pathogen[!Plot %in% bp_overv$Plot[which(bp_overv$value == 0)]]
# fill in missing combinations
plant.pathogen <- data.table::setDT(plant.pathogen)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
plant.pathogen[is.na(value), value := 0 ]
nrow(plant.pathogen) == 84 * 148
## [1] TRUE
pollinator <- spdiv[Trophic_level == "pollinator"]
spdiv <- spdiv[!Trophic_level == "pollinator"]
bp_overv <- aggregate(value ~ Plot, pollinator, sum)
barplot(height = bp_overv$value)
abline(h = mean(bp_overv$value), col = "red")
abline(h = median(bp_overv$value), col = "green")
pollinator <- pollinator[!Plot %in% bp_overv$Plot[which(bp_overv$value == 0)]]
# fill in missing combinations
pollinator <- data.table::setDT(pollinator)[data.table::CJ(Species=Species,Plot=Plot,unique=T), on=.(Species, Plot)]
pollinator[is.na(value), value := 0 ]
nrow(pollinator) == 705 * 144
## [1] TRUE
# cleaning
rm(bp_overv)
144 Plots with 705 species.
Find subset of plots without missing diversity data and store.
Is there any plot with zero species?
for(t in trlevels){
print(t)
tr <- get(t)
test <- aggregate(value ~ Plot, tr, sum)
print(test$Plot[which(test$value == 0)])
}
There is no more plot with zero species in the dataset.
Select the plots without missing information and store.
plots <- list()
for(t in trlevels){
tr <- get(t)
plots[[t]] <- unique(tr$Plot)
}
common.plots <- Reduce(intersect,plots) #128 plots
saveRDS(common.plots, file = "plotNAset.rds")
redplotset <- c()
for(t in trlevels){
redplotset[t] <- length(Reduce(intersect, plots[-which(names(plots) == t)]))
}
par(mar = c(10, 4.1, 4.1, 2.1))
barplot(redplotset, ylim = c(0, 140), las = 3, cex.names = 0.8, main = "Does removal of given trophic level increase number of plots?")
abline(h = 128, col = "darkgreen")
Inspect common plot set
paste("AEG :", length(grep("A", common.plots)), " HEG :", length(grep("H", common.plots)), " SEG :", length(grep("S", common.plots)), sep = "")
| region | no. plots |
|---|---|
| all | 128 |
| AEG | 40 |
| HEG | 43 |
| SEG | 45 |
Only keep the selected plots
for(t in trlevels){
tr <- get(t)
tr <- tr[Plot %in% common.plots, ]
assign(t, tr)
}
for(t in trlevels){
tr <- get(t)
b.tr <- prepare_for_betapair(tr)
b.tr <- betapart::beta.pair(b.tr, index.family = "sorensen")
assign(t, b.tr)
}
rm(spdiv); rm(test); rm(tr); rm(t); rm(redplotset); rm(plots); rm(b.tr)